*************
**# A. Setup
*************

*****************************
**## A.1 SIC imports weights
*****************************
use "raw/xm_sic87_72_105_20120424", clear
drop if missing(sic)
keep if year == 1972

*EEC at time of Tokyo (9 countries): Belgium, France, West Germany, Italy,
*Luxembourg (same as Belgium in import data), Netherlands, Denmark, Ireland, United Kingdom
*Source: https://en.wikipedia.org/wiki/European_Economic_Community
gen eec = 1 if inlist(wbcode, "BEL", "DEU","FRA")
replace eec = 1 if inlist(wbcode,"ITA","IRL","NLD","DNK","GBR")
replace wbcode="EEC" if eec==1
collapse (sum) x, by(sic wbcode)

preserve
	drop sic
	collapse (sum) x, by(wbcode)
	egen x_w=total(x)
	gen sh=x/x_w
	gsort -sh
	drop x*
	* can 28%
	* eec 21%
	* jpn 8%
	* mex 4.5%
	* bra 3%
	* ven 2%
	* aus 2%
	* zaf 1.5%
	* esp 1.5%
	* che 1.3%
restore		
	
*Share of US world exports going to CAN/EEC/JPN
bys sic: egen x_world=total(x)
gen keeper = 1 if inlist(wbcode, "CAN", "JPN", "EEC")
bys sic: egen ks=total(keeper)
assert ks>=1
keep if keeper==1
drop keeper ks

* US exports all SIC codes to at least one of EEC, CAN, JPN
bys sic: egen x_other=total(x)
gen sh_other = x_other/x_world 
	* Mean and median around 55%. 25-75 is 46%-68%. 10-90 is 27%-81%.

*Create Shares for each importer-sic pair
gen     export_share = x/x_other
replace export_share = 1/3 if x_world==0
assert x_other>0 if export_share==0
assert x_world==0 if sh_other==.
gen export_share_can = export_share*(wbcode=="CAN")
gen export_share_jpn = export_share*(wbcode=="JPN")
gen export_share_eec = export_share*(wbcode=="EEC")

collapse (max) export_share_* sh_other, by(sic)
label var sh_other         "Share of 1972 US SIC exports to CAN/EEC/JPN"
label var export_share_can "Share of 1972 US SIC exports to CAN/EEC/JPN that go to CAN"
label var export_share_eec "Share of 1972 US SIC exports to CAN/EEC/JPN that go to EEC"
label var export_share_jpn "Share of 1972 US SIC exports to CAN/EEC/JPN that go to JPN"

save "processing/can_eec_jpn_export_share_1972", replace


*****************************
**# B. Japan and EEC Tariffs
*****************************

*********************
**## B.1 Get tariffs
*********************
	
****************
**### B.1.1 EEC
****************
import excel "raw/eec_tokyo_cccn.xlsx", clear firstrow

*For cases with additional levies (L), additional duties on sugar (ads),
*variable compoents (vc), we are assigning the base ad valorem rate
replace BaseRate = subinstr(BaseRate,"%+ads","",1)
replace FinalRate = subinstr(FinalRate,"%+ads","",1)
replace FinalRate = subinstr(FinalRate,"%+2% ads","",1)

replace BaseRate = subinstr(BaseRate,"%+VC","",1)
replace FinalRate = subinstr(FinalRate,"%+VC","",1)

replace BaseRate = subinstr(BaseRate,"%+L","",1)
replace FinalRate = subinstr(FinalRate,"%+L","",1)

replace BaseRate = subinstr(BaseRate,"(AB)","",1)
replace FinalRate = subinstr(FinalRate,"(AB)","",1)

replace BaseRate = subinstr(BaseRate,"(NB)","",1)
replace FinalRate = subinstr(FinalRate,"(NB)","",1)

replace BaseRate = subinstr(BaseRate,"NB","",1)
replace FinalRate = subinstr(FinalRate,"NB","",1)

*Throwing out specific tariffs. 42 of these
drop if regexm(BaseRate,"UA per") == 1 |  regexm(FinalRate,"UA per") == 1
drop if regexm(BaseRate,"UA each") == 1 |  regexm(FinalRate,"UA each") == 1
drop if BaseRate == "DM 10 per 1000 kg net"

replace Base = "0" if regexm(BaseRate,"free") | regexm(BaseRate,"Free")
replace Final = "0" if regexm(Final,"free") | regexm(Final,"Free")

replace Base = subinstr(Base,",",".",1)
replace Final = subinstr(Final,",",".",1)
replace Final = trim(Final)
destring Base Final, replace
drop if Base == . | Final == .

*Change in decimal places due to recording as %
replace Base = Base*100 if BaseRate < 1 & _n <= 200
*Some cases with Base = 1 & Final = .9
replace Final = Final*100 if FinalRate < 1 & Base > 1

collapse Base Final, by(CCCN)
rename CCCN cccn

save "processing/eec_cccn_cleaned", replace
	
******************
**### B.1.2 Japan
******************
import excel "raw/japan_tokyo_cccn.xlsx", clear firstrow

*Throwing out compound and specific tariffs. Only 97 of these out of 2587
drop if regexm(BaseRate,"yen") == 1 | regexm(BaseRate,"waste") == 1 ///
	| regexm(BaseRate,"sen") == 1  | regexm(FinalRate, "yen") == 1

replace Base = "0" if regexm(BaseRate,"free") | regexm(BaseRate,"Free")
replace Final = "0" if regexm(FinalRate,"free") | regexm(FinalRate,"Free")

drop if  Base == "9.999.0" | Base == "9999"
destring Base Final, replace
drop if Base == . | Final == .

collapse Base Final, by(cccn)
replace Base = Base*100
replace Final = Final*100

save "processing/jpn_cccn_cleaned", replace


**********************
**## B.2 Concord to HS
**********************
* Assign zero change to all HS codes that don't have a corresponding CCCN
foreach country in eec jpn {
use "processing/`country'_cccn_cleaned", clear
merge 1:m cccn using "processing/cccn_hs_wits"
	*EEC
		* m=1: cccn codes that not in concordance (18 obs)
		* m=2: cccn codes that in cocnordance but not in our cccn tariff data 
		* (1758 obs and 786 unique cccn codes)
	*JPN	
		* m=1: cccn codes that not in concordance (1 obs) 
		* m=2: cccn codes that in cocnordance but not in our cccn tariff data 
		* (1876 obs and 796 unique cccn codes)

drop if _m == 1
gen tariff_change = FinalRate-BaseRate
replace tariff_change = 0 if _m == 2
drop _merge
keep tariff_change hs
save "processing/hs_`country'_tariffs", replace
}

************************
**## B.3 Concord to SIC
************************

foreach country in eec jpn {

use "processing/hs_`country'_tariffs.dta", clear

mmerge hs using "processing/hs_sic_other_tariff_conc.dta", type(1:n)
assert _m != 1
keep if _m == 3
drop _m

egen scale = total(hs_sic_weight), by(sic)
assert inrange(scale,0,1)
replace hs_sic_weight = hs_sic_weight/scale

gen `country'_observed_change = tariff_change*hs_sic_weight

collapse (sum) `country'_observed_change, by(sic)
drop if regexm(sic,"X") == 1
destring sic, replace

replace `country'_observed_change = `country'_observed_change/8

keep `country'_observed_change sic
label var `country'_observed_change "(1/8)*(eec_final_tariff-eec_base_tariff), Approach `approach'"

save "processing/sic_`country'_tariffs.dta", replace

}

**********************
**# C. Canada Tariffs
**********************
	
*********************	
**## C.1 Get tariffs
*********************
import excel "raw/Trefler_Annual_Level_Transform.xls", sheet("Sheet1") firstrow clear
	*sic4 is the 1980 Canadian 4-digit SIC
rename sic4 can_sic
xtset can_sic year
keep can_sic year t*
replace ta = ta*100
gen end_ta = f7.ta
keep if year == 1980
keep can_sic year ta end_ta
	
	
*************************************	
**## C.2 Concord from CSIC to US SIC
*************************************
merge 1:m can_sic using "processing/us_sic_condorance"
assert _m!=1
	* m=2: 14 unique can sic codes in the concordance that not in trefler tariff data
keep if _merge == 3
drop _merge

*Rescaling to account for SIC codes not in the concordance
egen scale = total(weight), by(us_sic)
replace weight = weight/scale
replace ta = ta*weight
replace end_ta = end_ta*weight

collapse (sum) ta end_ta, by(us_sic)

rename ta can_tariff
rename end_ta end_can_tariff

*Collapse within SIC4
gen sic4 = substr(us_sic,1,4)
collapse can_tariff end_can_tariff, by(sic4)

rename sic4 sic
label var sic "US SIC" 
label var can_tariff "CAN Tariffs Against USA, 1980"

destring sic, replace

*Annualizing the change: note we are scaling to account for the fact that we only
*see 80-87, not 79-87	
gen can_observed_change     = (end_can_tariff-can_tariff)/7
gen can_observed_change_AVE = (ln(1+end_can_tariff)-ln(1+can_tariff))/7

keep can_observed_change sic
save "processing/Trefler_Tariffs_1980", replace

	
****************************************
**# D. Weighted average foreign tariffs 
****************************************
use "processing/sic_jpn_tariffs", clear
merge 1:1 sic using "processing/sic_eec_tariffs"

drop _merge 

merge 1:1 sic using "processing/Trefler_Tariffs_1980"
	* m=1: 63 sic codes with jpn or eec tariffs but not can tariff
	* m=2: 73 sic codes with can tariff but no tariff for jpn or eec
drop _merge
keep if sic >= 2000 & sic < 4000
	* within manuf, missing tariffs for jpn on 117 sic codes, eec on 116 sic codes, can on 4 sic codes
merge 1:1 sic using "processing/can_eec_jpn_export_share_1972"
	* m=1: 14 sic codes with foreign tariff data but no US export data
	* m=1: 2 sic codes with US export data but no foreign tariff data
keep if _m == 3
drop _merge

assert !missing(export_share_jpn) & !missing(export_share_can) & !missing(export_share_eec)

gen other_tariff_change = jpn_observed_change*export_share_jpn + ///
eec_observed_change*export_share_eec + can_observed_change*export_share_can

collapse eec_observed_change* jpn_observed_change* can_observed_change (sum) other_tariff_change*, by(sic)
save "processing/other_tariff_changes.dta", replace